library(tidycensus)
library(tidyverse)
library(readxl)

TARGET_CENSUS_VARS <- c(
  total = "B02001_001E",
  white = "B03002_003E",
  black = "B03002_004E",
  asian = "B03002_006E",
  hispanic = "B03003_003E"
)

race_zip_2021_RAW <- get_acs(geography = "zcta",
                         variables = TARGET_CENSUS_VARS,
                         year = 2021,
                         output = "wide"
) |>
  mutate(white = white,
         black = black,
         hispanic = hispanic,
         asian = asian) |>
  rename_with(\(x) str_c(x, "_2021"))


race_zip_2021 <- get_acs(geography = "zcta",
                         variables = TARGET_CENSUS_VARS,
                         year = 2021,
                         output = "wide"
) |>
  mutate(white = white/total,
         black = black/total,
         hispanic = hispanic/total,
         asian = asian/total) |>
  rename_with(\(x) str_c(x, "_2021"))


race_county_2021 <- get_acs(geography = "county",
                            variables = TARGET_CENSUS_VARS,
                            year = 2021,
                            output = "wide"
) |>
  mutate(white = white/total,
         black = black/total,
         hispanic = hispanic/total,
         asian = asian/total) |>
  rename_with(\(x) str_c(x, "_2021"))



race_zip_2011 <- get_acs(geography = "zcta",
                         variables = TARGET_CENSUS_VARS,
                         year = 2011,
                         output = "wide"
) |>
  mutate(GEOID = str_sub(GEOID, -5)) |>
  mutate(white = white/total,
         black = black/total,
         hispanic = hispanic/total,
         asian = asian/total) |>
  rename_with(\(x) str_c(x, "_2011"))



race_county_2011 <- get_acs(geography = "county",
                            variables = TARGET_CENSUS_VARS,
                            year = 2011,
                            output = "wide"
) |>
  mutate(GEOID = str_sub(GEOID, -5)) |>
  mutate(white = white/total,
         black = black/total,
         hispanic = hispanic/total,
         asian = asian/total) |>
  rename_with(\(x) str_c(x, "_2011"))



####### RAW 
race_county_2021_RAW <- get_acs(geography = "county",
                            variables = TARGET_CENSUS_VARS,
                            year = 2021,
                            output = "wide"
) |>
  mutate(white = white,
         black = black,
         hispanic = hispanic,
         asian = asian) |>
  rename_with(\(x) str_c(x, "_2021"))



race_county_2011_RAW <- get_acs(geography = "county",
                            variables = TARGET_CENSUS_VARS,
                            year = 2011,
                            output = "wide"
) |>
  mutate(GEOID = str_sub(GEOID, -5)) |>
  mutate(white = white,
         black = black,
         hispanic = hispanic,
         asian = asian) |>
  rename_with(\(x) str_c(x, "_2011"))





res_zip <- full_join(race_zip_2011, race_zip_2021, by=c("GEOID_2011" = "GEOID_2021")) |>
  mutate(population_change = total_2021 - total_2011,
         change_in_pct_white = white_2021 - white_2011,
         change_in_pct_black = black_2021 - black_2011,
         change_in_pct_hispanic = hispanic_2021 - hispanic_2011,
         change_in_pct_asian = asian_2021 - asian_2011,
  ) |>
  rename(zip = GEOID_2011,
         population = total_2021) |>
  select(-ends_with("2021"), -ends_with("2011")) |>
  rename_with(.fn = \(x) str_c("zip_", x), .cols = starts_with(c("change", "population")))



res_county1 <- full_join(race_county_2011, race_county_2021, by=c("GEOID_2011" = "GEOID_2021")) |>
  mutate(population_change = total_2021 - total_2011,
         change_in_pct_white = white_2021 - white_2011,
         change_in_pct_black = black_2021 - black_2011,
         change_in_pct_hispanic = hispanic_2021 - hispanic_2011,
         change_in_pct_asian = asian_2021 - asian_2011,
         
         # New dummy variable (1 if white_2021 > 75%, 0 otherwise)
         above_97_white = as.integer(white_2011 > 0.97),
         above_95_white = as.integer(white_2011 > 0.95),
         above_90_white = as.integer(white_2011 > 0.90),
         above_85_white = as.integer(white_2011 > 0.85),
         above_80_white = as.integer(white_2011 > 0.80),
         above_75_white = as.integer(white_2011 > 0.75),
         white_majority_minority_flip = as.integer(white_2011 > 0.50 & white_2021 < 0.50 ),
         white_composition_prior = white_2011,
         black_composition_prior = black_2011,
         hispanic_composition_prior = hispanic_2011,
         asian_composition_prior = asian_2011) |>
  rename(county = GEOID_2011,
         population = total_2021) |>
  select(-ends_with("2021"), -ends_with("2011")) |>
  rename_with(.fn = \(x) str_c("county_", x), .cols = starts_with(c("change", "population")))

table(res_county1$white_composition_prior)
table(res_county1$black_composition_prior)




# https://www.huduser.gov/apps/public/uspscrosswalk/home
crosswalk <-  read_excel("Downloads/zip_county_032022.xlsx") |>
  group_by(ZIP) |>
  slice_max(TOT_RATIO, n = 1, with_ties = FALSE) |>
  ungroup() |>
  select(ZIP, COUNTY)

# This Census file is only being used for joining in state names - could easily
# cut out.
census_cc <- read_csv("~/datasets/census/2020_population_estimates/CC-EST2020-ALLDATA6.csv",
                      na = c("", "NA", "X"),
                      show_col_types = FALSE) |>
  select(STATE, COUNTY, STNAME, CTYNAME) |>
  mutate(COUNTY = str_c(STATE, COUNTY)) |>
  select(-STATE) |>
  distinct(STNAME, COUNTY, CTYNAME)

crosswalk <- inner_join(crosswalk, census_cc, by = "COUNTY") |>
  select(ZIP, COUNTY, STATE = STNAME, COUNTY_LABEL = CTYNAME)

# drop wonky row that causes a many-to-many
crosswalk <- crosswalk[-12264,]


res <- res_zip |>
  left_join(crosswalk, by = c("zip" = "ZIP")) |>
  left_join(res_county1, by = c("COUNTY" = "county"))

table(res)


write_tsv(res, "data/zip_pop_change.tsv")